library("rstudioapi")     
##Set working directory
setwd(dirname(getActiveDocumentContext()$path))
getwd()
library("readxl") 
require(miceadds)
library(data.table)
library(tidyverse)
library(DescTools)
library(htmlTable)
library(stargazer)
library(estimatr)
library(tidyverse)
library(DescTools)
require(dplyr)

load("FinalFinal20062023Full1-6.RData")
load("FinalFinalSP20062023Full1-6.RData")

# added by AGB: centre and scale age and income
library(dplyr)
final_finalV2 = mutate(final_finalV2, 
                       Agec = (Age - 30)/10, # per 10 year age
                       Incomec = (Income - 100)/50)

# Ad D1: 
# Create an additional variable R for each individual 
final_finalV2$R = ifelse(is.na(final_finalV2$ActVacApril), 0, 
                         ifelse(final_finalV2$ActVacApril == 1 |  final_finalV2$ActVacApril == 0, 1,0))

table(final_finalV2$R)

# Data preparation: 
fin = final_finalV2
fin$CDC_p = ifelse(fin$individual_treatment == "CDC Health", 1, 
                   ifelse(fin$individual_treatment == "Placebo", 0, NA))
fin$Low_p = ifelse(fin$individual_treatment == "Low Cash", 1, 
                   ifelse(fin$individual_treatment == "Placebo", 0, NA))
fin$High_p = ifelse(fin$individual_treatment == "High Cash", 1, 
                   ifelse(fin$individual_treatment == "Placebo", 0, NA))

#### CDC: 
mod1_cdc = glm(R ~ CDC_p + 
               vaccine_intention + 
                 Gender + Agec + 
                 Education + Incomec + Employed, 
               data = fin,
               family=binomial)
preds_mod_cdc = (predict(mod1_cdc, newdata=fin, type="response"))
pred_cdc = na.omit(preds_mod_cdc)

t.test(
  na.omit(as.vector(preds_mod_cdc[fin$CDC_p == 1])),
  na.omit(as.vector(preds_mod_cdc[fin$CDC_p == 0])),
  na.rm = TRUE, var.equal = TRUE
)

### Low Cash
mod1_Low = glm(R ~ Low_p + 
               vaccine_intention + 
                 Gender + Agec + 
                 Education + Incomec + Employed, 
             data = fin,
             family=binomial)
summary(mod1_Low)
preds_mod_Low = (predict(mod1_Low, newdata=fin, type="response"))
pred_Low = na.omit(preds_mod_Low)

t.test(
  na.omit(as.vector(preds_mod_Low[fin$Low_p == 1])),
  na.omit(as.vector(preds_mod_Low[fin$Low_p == 0])),
  na.rm = TRUE, var.equal = TRUE
)

### High Cash
finHigh = fin[-which(is.na(fin$High_p)),]


mod1_High = glm(R ~ High_p + 
                 vaccine_intention + 
                  Gender + Agec + 
                  Education + Incomec + Employed, 
               data = finHigh,
               family=binomial)
summary(mod1_High)
preds_mod_High = (predict(mod1_High, newdata=finHigh, type="response"))
pred_High = na.omit(preds_mod_High)
hist(pred_High)

t.test(
  na.omit(as.vector(preds_mod_High[finHigh$High_p == 1])),
  na.omit(as.vector(preds_mod_High[finHigh$High_p == 0])),
  na.rm = TRUE, var.equal = TRUE
)


mod1_All = glm(R ~ individual_treatment, 
                data = fin,
                family=binomial)
summary(mod1_All)

mod2_All = glm(R ~ individual_treatment + 
                  vaccine_intention + 
                 Gender + Agec + 
                 Education + Incomec + Employed, 
                data = fin,
                family=binomial)
summary(mod2_All)
nobs(mod2_All)
# Fixed effects: 
library(fixest)
mod3_All = feglm(R ~ individual_treatment + 
                     vaccine_intention + 
                     Gender + Agec + 
                     Education + Incomec + Employed | Q123, 
                     data = fin,
                     family=binomial)
summary(mod3_All)

### Create a table combining: 
# mod1, mod2 and mod3: 
mod1_ov = cbind(coef(mod1_All),confint(mod1_All))
mod2_ov = cbind(coef(mod2_All),confint(mod2_All))
mod3_ov = cbind(coef(mod3_All),confint(mod3_All))

col1 = c()
for (i in c(1:dim(mod1_ov)[1])){
  col1[i] = as.character(paste(format(round(mod1_ov[i,1],2), nsmall = 2),paste("(",paste(format(round(mod1_ov[i,2],2), nsmall = 2), format(round(mod1_ov[i,3],2), nsmall = 2), sep = ", "),")", sep ="")))
}
col1 = data.frame(col1);rownames(col1) = rownames(mod1_ov);colnames(col1) = "Model1"

col2 = c()
for (i in c(1:dim(mod2_ov)[1])){
  col2[i] = as.character(paste(format(round(mod2_ov[i,1],2),nsmall = 2),paste("(",paste(format(round(mod2_ov[i,2],2), nsmall = 2), format(round(mod2_ov[i,3],2),nsmall = 2), sep = ", "),")", sep ="")))
}
col2 = data.frame(col2);rownames(col2) = rownames(mod2_ov);colnames(col2) = "Model2"

col3 = c()
for (i in c(1:dim(mod3_ov)[1])){
  col3[i] = as.character(paste(format(round(mod3_ov[i,1],2),nsmall = 2),paste("(",paste(format(round(mod3_ov[i,2],2), nsmall = 2), format(round(mod3_ov[i,3],2),nsmall = 2), sep = ", "),")", sep ="")))
}
col3 = data.frame(col3);rownames(col3) = rownames(mod3_ov);colnames(col3) = "Model3"

# Basic table with only coefficients: 
tab_st = merge(col1, col2, by = 0, all = TRUE)
rownames(tab_st) = tab_st[,1]
tab_st[,1] = NULL
tab_st = merge(tab_st, col3, by = 0, all = TRUE)
rownames(tab_st) = tab_st[,1]
tab_st[,1] = NULL

no.obs = c(nobs(mod1_All), nobs(mod2_All), nobs(mod3_All))
aic_mod = round(c(summary(mod1_All)$aic, summary(mod2_All)$aic, AIC(mod3_All)),2)
log_lik = round(c(as.vector(logLik(mod1_All)), as.vector(logLik(mod2_All)), as.vector(logLik(mod3_All))),2)
mod_spec = data.frame(rbind(no.obs, aic_mod, log_lik))
colnames(mod_spec) = c("Model1", "Model2", "Model3")

tab_st2 = rbind(tab_st, mod_spec)
rownames(tab_st2) = c(rownames(tab_st), "Observations", "Akaike Inf. Crit.", "Log Likelihood")
rownames(tab_st2)[c(1,3,4,5,7,8,9,10,11,12)] = c("Intercept","High-Educate","Low-Educate", "Medium-Educate", "Male",
                                       "Mean-Food","Health", "High Cash", "Low Cash", "Vaccine Intention")
output = tab_st2[c(11,10,9,7,2,3,5,4,8,6,12,1,13,14,15),]

## Table 13: Attrition Table
stargazer(output, type = "text",
          summary = FALSE, out="ExtData_Table4_PanelB.tex")

